import os

import urllib
import requests
from lxml import etree
from bs4 import BeautifulSoup

url = 'http://sc.chinaz.com/jianli/free.html'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
}

dirName = 'resumes'
if not os.path.exists(dirName):
    os.mkdir(dirName)

for i in range(1, 6):
    new_url = url if i == 1 else f'http://sc.chinaz.com/jianli/free_{i}.html'

    response = requests.get(url=new_url, headers=headers)
    response.encoding = 'utf-8'
    page_text = response.text

    tree = etree.HTML(page_text)
    a_list = tree.xpath('//div[@id="container"]//div/a')

    for a in a_list:
        title = a.xpath('./img/@alt')[0] + '.rar'   # xpath可以链式操作，反正都是xpath对象
        detail_url = a.xpath('./@href')[0]
        detail_response = requests.get(url=detail_url, headers=headers)
        detail_page_text = detail_response.text

        tree = etree.HTML(detail_page_text)
        a_href = tree.xpath('//*[@id="down"]//ul/li[1]/a/@href')[0]
        file_path = dirName + '/' + title
        response = requests.get(url=a_href, headers=headers)
        with open(file_path, 'wb') as f:
            f.write(response.content)
        print(title, "爬取完成！")
